",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.2",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b)}(this,document); };

Themes

  1. The reproducibility ‘crisis’
  2. What does reproducible science look like?
  3. Using R & RStudio for R-eproducible science
  4. Advanced topics in R-eproducible science
  5. The future of reproducible psychological science

The reproducibility crisis

I like to start off talks about reproducibility in science with some humor. This video is a few years old, but it has some timeless insights.

https://www.youtube.com/embed/66oNv_DJuPc

What’s the point? That even the most well-meaning of us can make careless errors that undermine the reproducibility of science.

But, is it a crisis?

In 2016, Nature published the results of a survey of 1,500 scientists (Baker 2016). They were asked a number of questions, including the following:

Is there a reproducibility crisis?

  • Yes, a significant crisis
  • Yes, a slight crisis
  • No crisis
  • Don’t know

Do we all agree?

source(params$supporting_functions)
survey <- readr::read_csv(params$survey_data_fn)

survey$crisis <- ordered(survey$crisis, c("Significant", "Slight", "No crisis", "Don't know"))

survey %>%
  dplyr::filter(., !is.na(crisis)) %>%
  ggplot2::ggplot(.) +
  aes(x = crisis, fill = crisis) +
  geom_bar(stat = "count") +
  scale_y_discrete(breaks = c(0, 5, 10, 15), limits = 0:15)

Problems with reproducibility extend beyond behavioral science

Why is reproducibility hard?

Notice that the ‘New R Markdown’ window let’s us choose different types of documents, presentations, an interactive web application using Shiny, or another file from some template. We’ll just use the defaults and create a new Untitled document that gives as its default output an HTML file.

The template shows us the core components of an R Markdown file:

Body text

The body text starts with the double hash marks ##. R Markdown follows Markdown’s convention of using hash marks to specify heading levels as in an outline. One hashmark means the 1st or top level. Two hashmarks means the 2nd level, etc.3

Note that we can include clickable web links by surrounding URLs with angular brackets <>, make text boldface by surrounding it with double-asterisks **boldface** or in italics with single asterisks *italics*.

R Markdown allows other kinds of content to be inserted in body text:

  • Named links
  • Images: rawr
  • Equations: \(e = mc^2\)

and even video or audio recordings using HTML.

Code chunks

Code chunks are separated from the body text by triple back-ticks ‘```’.

Let’s look at the second code chunk:

Text in brackets {r cars} tells R that this chunk contains code written in R 4 and gives the chunk the name cars. The name is optional, and must be unique within a file, but it can help in debugging a long R Markdown file. In this case, the chunk runs the summary() command on the cars dataset.

When you create your own R Markdown documents, you will put your R code inside a chunk. You can create new chunks by clicking on a blank line in the R Markdown document and typing CTRL+ALT+I.

The virtue of putting code in chunks is that you can run them piece by piece from within the document. For example, clicking on the small right arrow icon runs the current chunk.

Scrolling down to the next code chunk, we see that it plots data from the pressure dataset: plot(pressure).

Returning to the first chunk called setup, we see that chunks themselves can have options that specify whether or not they are displayed echo=FALSE in the document, whether or not chunks are evaluated eval=TRUE and so forth. This allows the user to customize how the document executes each chunk. See the RStudio documentation for more information about what chunk options suit your needs.

Rendering output

Edit the body text if you like, then render the document using the Knit button. If we have not saved the file, we may be prompted for a file name, you can use test.Rmd for now. This will generate a test.html file (per the output: html_document in the header). Let’s open that file. We can see that the document combines the body text, links, R code chunks and R code ouputs, including plots in a very readable way.

One of the virtues of R Markdown, of course, is that we could produce different output formats for the same file, either by changing the output field in the document header or by issuing a command in the console:

rmarkdown::render("test.Rmd", 
                  output_format = 'pdf_document')
rmarkdown::render("test.Rmd", 
                  output_format = 'word_document')

# More than one output_format
rmarkdown::render("test.Rmd", 
                  output_format = c('html_document',
                                    'pdf_document',
                                    'word_document'))

R Markdown using 2019 R bootcamp data

We can use an R Markdown document bootcamp-survey.Rmd to analyze the survey data. Let’s open it up and see how it looks.

The default format is an html_document, and I’ve added some additional parameters in the header to produce a table of contents toc: yes with numbered sections number_section: TRUE, that create a ‘floating’ menu-like table of contents via toc_float: TRUE.

output:
  html_document:
    toc: TRUE
    toc_depth: 3
    toc_float: TRUE
    number_section: TRUE

I’ve also added parameters so I can easily produce outputs in different formats. Notice that I’ve added comments about what I did and why, so that the R Markdown file is like a combination lab notebook and data report. And by creating it in R Markdown, I can satisfy many audiences with different needs.

Your adviser likes PDFs? No problem. Your collaborator prefers MS Word? Got it covered. Need to give a quick brown bag talk you can give from any web browser? Easy. R Markdown can become one of your super-powers.

More about R Markdown

there is a book called R Markdown: The Definitive Guide by Yihui Xie, J. J. Allaire, Garrett Grolemund. Here is a link to the e-version of the book: https://bookdown.org/yihui/rmarkdown/.

Why write reproducible papers/reports in R Markdown?

The previous example showed how we might create reproducible data analysis reports in R Markdown. It’s only a short step to writing full papers this way. But let’s talk about why we might want to do this.

The following is section is copied verbatim from Mike Frank & Chris Hartgerink’s tutorial on GitHub.

There are three reasons to write reproducible papers. To be right, to be reproducible, and to be efficient. There are more, but these are convincing to us. In more depth:

To avoid errors. Using an automated method for scraping APA-formatted stats out of PDFs, [@Nuijten2015-ul] found that over 10% of p-values in published papers were inconsistent with the reported details of the statistical test, and 1.6% were what they called “grossly” inconsistent, e.g. difference between the p-value and the test statistic meant that one implied statistical significance and the other did not. Nearly half of all papers had errors in them.

To promote computational reproducibility. Computational reproducibility means that other people can take your data and get the same numbers that are in your paper. Even if you don’t have errors, it can still be very hard to recover the numbers from published papers because of ambiguities in analysis. Creating a document that literally specifies where all the numbers come from in terms of code that operates over the data removes all this ambiguity.

To create spiffy documents that can be revised easily. This is actually a really big neglected one for us. At least one of us used to tweak tables and figures by hand constantly, leading to a major incentive never to rerun analyses because it would mean re-pasting and re-illustratoring all the numbers and figures in a paper. That’s a bad thing! It means you have an incentive to be lazy and to avoid redoing your stuff. And you waste tons of time when you do. In contrast, with a reproducible document, you can just rerun with a tweak to the code. You can even specify what you want the figures and tables to look like before you’re done with all the data collection (e.g., for purposes of preregistraion or a registered report).

Advanced topics in R-eproducible science

There is so much more to say about how to do R-eproducible science. Here are some advanced topics we can spend a few minutes discussing, depending on your level of interest. These materials will remain available to you for later reference.

Version control

Track changes is great? Right? But if you’ve ever written a lengthy document with other people, you’ve experienced the challenge of tracking versions across time. At some point, the changes become too extensive to track, and so the author(s) decide to accept or reject a bunch and create a new version. This is how version control becomes an extension of the track changes problem. Most of us have experienced something like this sequence: ‘paper.docx’, ‘paper_new.docx’, ‘paper_new_new.docx’, ‘paper_new_new_ROG.docx’, etc.

My current scheme with colleagues is something like this: ‘nsf_grant_2018-08-16v1.docx’, ‘nsf_grant_2018-08-16v2.docx’, etc. That is, each person who modifies the document saves it as a new version. It doesn’t avoid conflicts if we’re working in parallel, but it does help us track down where we went astray.

Imagine a scheme for doing this automatically with your R and RStudio files? RStudio incorporates two ‘version control’ systems from the software development world, ‘git’ and ‘subversion’. I use ‘git’ and a web-based service for managing projects that use git called GitHub.

Rick’s GitHub workflow

We don’t have time to go into git and GitHub here, but I strongly recommend Jenny Bryan’s tutorial Happy Git and GitHub for the useR. In the meantime, this is the workflow I use for almost every project I do that will involve R:

  1. Create a repo on GitHub
  2. Copy repo URL
  3. File/New Project.../
  4. Version Control, Git
  5. Paste repo URL
  6. Select local name for repo and directory where it lives.
  7. Open project within R Studio File/Open Project...
  8. Commit (upload a commented version) early & often

These videos show this workflow in action.